Modelo Poisson com Zero Individualmente Inflado

Importanto as bibliotecas necessárias

require(tidyverse)
require(plotly)
require(knitr)
require(rstan)
require(reshape2)
require(bayesplot)

Gerando os jogos

num_teams <- 20

# Dicionario para relacionar o id de um time com seu nome
team_names <- c("Dragões do Sertão", "Atlético Rio Vermelho", "Borborema",
                "Guerreiros da Mata", "Cacique", "Aurora Litorânea",
                "Gávea Azul", "Mandacaru United", "Capibaribe", 
                "Índios Tupiniquins", "Atlético Taquara Verde", "Seriema",
                "Blumenau City", "Iguaçu", "Atlético Palmares",
                "Serra Dourada", "Sambaqui", "Pampa", 
                "Riacho do Meio", "Sport Club Xingu")


games <- data.frame(
  h = rep(1:num_teams, each = num_teams),
  a = rep(1:num_teams, times = num_teams)
)

games <- games[games$h != games$a, ]

Definindo os parâmetros dos dados gerados:

set.seed(28)

mu_att <- 0
sd_att <- 0.25

mu_def <- 0
sd_def <- 0.25

beta_0 <- rnorm(1, 0, 0.1)
home_effect <- rnorm(1, 0.2, 0.1)
att_effects <- rnorm(num_teams, mu_att, sd_att)
def_effects <- rnorm(num_teams, mu_def, sd_def)

p_zero_h <- runif(num_teams, min=0, max=0.3)
p_zero_a <- runif(num_teams, min=0, max=0.45)
  • \(\beta_{0}\) = -0.1902157
  • \(home\) = 0.1935705
  • \(\mu_{att}\) = 0
  • \(\sigma_{att}\) = 0.25
  • \(\mu_{def}\) = 0
  • \(\sigma_{def}\) = 0.25

Esses foram os efeitos de ataque e defesa gerados:

E essas foram as probabilidades de zero gols geradas:

Gerando os resultados dos jogos

set.seed(40)

simulate_games <- function(games){
  num_games <- length(games$h)
  home_team <- games$h
  away_team <- games$a
  
  theta_1 <- beta_0 + home_effect + att_effects[home_team] + def_effects[away_team]
  theta_2 <- beta_0 + att_effects[away_team] + def_effects[home_team]
  
  y1 <- rpois(num_games, exp(theta_1)) * rbinom(num_games, 1, (1 - p_zero_h[home_team]))
  y2 <- rpois(num_games, exp(theta_2)) * rbinom(num_games, 1, (1 - p_zero_a[away_team]))
  
  games$y1 <- y1
  games$y2 <- y2  
  
  assign("games", games, envir = .GlobalEnv)
}

simulate_games(games)

Análise dos resultados gerados

Alguns resultados que nos dão uma visão geral dos dados são:

  • O time com o melhor ataque da competição foi o Iguaçu balançando as redes 52 vezes.
  • O time com o pior ataque da competição foi o Atlético Rio Vermelho marcando 13 gols.
  • O time com a melhor defesa da competição foi o Atlético Rio Vermelho tendo sofrido um total de 16 gols.
  • O time com a pior defesa da competição foi o Riacho do Meio tendo sofrido um total de 40 gols.
  • A maior goleada do campeonato foi a vitória do Atlético Palmares por 5 a 0 em cima do Capibaribe
  • O Iguaçu foi o campeão com 70 pontos
  • O Sport Club Xingu foi o lanterna com 32 pontos
  • O Atlético Rio Vermelho foi o primeiro time fora da zona, se salvando do rebaixamento com 36 pontos
Clique para ver a tabela do campeonato completa
Posicao Time Pontos Vitorias Empates Derrotas GM GS SG
1 Iguaçu 70 20 10 8 52 30 22
2 Atlético Taquara Verde 67 18 13 7 39 19 20
3 Atlético Palmares 66 17 15 6 44 29 15
4 Borborema 64 16 16 6 27 24 3
5 Mandacaru United 60 15 15 8 31 25 6
6 Guerreiros da Mata 59 14 17 7 40 26 14
7 Aurora Litorânea 59 15 14 9 30 22 8
8 Gávea Azul 57 15 12 11 35 29 6
9 Serra Dourada 57 14 15 9 24 20 4
10 Pampa 56 15 11 12 36 24 12
11 Cacique 51 13 12 13 39 30 9
12 Dragões do Sertão 43 9 16 13 21 23 -2
13 Seriema 43 11 10 17 23 19 4
14 Capibaribe 39 8 15 15 15 23 -8
15 Riacho do Meio 38 10 8 20 33 40 -7
16 Atlético Rio Vermelho 36 6 18 14 13 16 -3
17 Índios Tupiniquins 36 9 9 20 32 37 -5
18 Sambaqui 36 5 21 12 22 27 -5
19 Blumenau City 33 7 12 19 17 23 -6
20 Sport Club Xingu 32 5 17 16 15 16 -1

Definindo os parâmetros das prioris

hyper_params <- list(
  beta_0_mu = 0,
  beta_0_sd = 1000,
  home_mu = 0,
  home_sd = 1000,
  att_mu = 0,
  def_mu = 0,
  sd_att_mu = 0,
  sd_att_sig = 2.5,
  sd_def_mu = 0,
  sd_def_sig = 2.5,
  p_zero_h_alpha = 2,
  p_zero_h_beta = 8,
  p_zero_a_alpha = 2,
  p_zero_a_beta = 18
)

Estimando os parâmetros com o STAN

data <- append(append(list(ngames = num_games, nteams = num_teams), as.list(games)), hyper_params)

model <- stan_model("./models/model.stan")

iter <- 2000
fit <- sampling(model, data = data, iter = iter, chains = 2, cores = 2)
data {
  int<lower=1> ngames;       
  int<lower=1> nteams;       
  int h[ngames];             
  int a[ngames];             
  int<lower=0> y1[ngames];            
  int<lower=0> y2[ngames];
  real beta_0_mu;
  real<lower=0> beta_0_sd;
  real home_mu;
  real<lower=0>home_sd;
  real att_mu;
  real def_mu;
  real sd_att_mu;
  real sd_att_sig;
  real sd_def_mu;
  real sd_def_sig;
  real p_zero_h_alpha;
  real p_zero_h_beta;
  real p_zero_a_alpha;
  real p_zero_a_beta;
}

parameters {
  real beta_0;
  real home;                 
  vector[nteams] att;   
  vector[nteams] def;  
  real<lower=0> sd_att;    
  real<lower=0> sd_def;
  vector<lower=0, upper=1>[nteams] p_zero_h;
  vector<lower=0, upper=1>[nteams] p_zero_a;
}

model {
  for (g in 1:ngames) {
    if (y1[g] == 0){
      target += log_sum_exp(
        bernoulli_lpmf(1 | p_zero_h[h[g]]),
        bernoulli_lpmf(0 | p_zero_h[h[g]]) + poisson_log_lpmf(y1[g] | beta_0 + home + att[h[g]] + def[a[g]])
      );
    }
    else {
      target += bernoulli_lpmf(0 | p_zero_h[h[g]]) + poisson_log_lpmf(y1[g] | beta_0 + home + att[h[g]] + def[a[g]]);
    }
    if (y2[g] == 0){
      target += log_sum_exp(
        bernoulli_lpmf(1 | p_zero_a[a[g]]),
        bernoulli_lpmf(0 | p_zero_a[a[g]]) + poisson_log_lpmf(y2[g] | beta_0 + att[a[g]] + def[h[g]])
      );
    }
    else {
      target += bernoulli_lpmf(0 | p_zero_a[a[g]]) + poisson_log_lpmf(y2[g] | beta_0 + att[a[g]] + def[h[g]]);
    } 
  }

  beta_0 ~ normal(beta_0_mu, beta_0_sd);
  home ~ normal(home_mu, home_sd);
  sd_att ~ cauchy(sd_att_mu, sd_att_sig);
  sd_def ~ cauchy(sd_def_mu, sd_def_sig);
  att ~ normal(att_mu, sd_att);
  def ~ normal(def_mu, sd_def);
  p_zero_h ~ beta(p_zero_h_alpha, p_zero_h_beta);
  p_zero_a ~ beta(p_zero_a_alpha, p_zero_a_beta);
}

Traceplots

Autocorrelação

Simulando para 200 campeonatos

Visualizando os valores estimados

Parâmetro 2,5% Mediana Valor Real 97,5%
Dragões do Sertão -0.522407152253587 -0.109152108011113 -0.332791766393282 0.211714672992233
Atlético Rio Vermelho -0.683533064340189 -0.184309211430732 -0.454997916787073 0.1602559671384
Borborema -0.384857403628569 -0.038484190323695 0.0406674213003283 0.268528472130786
Guerreiros da Mata -0.15147021462051 0.122929465915783 0.1328490848347 0.49665459031398
Cacique -0.162678836930818 0.123844201313796 0.0738797404362433 0.473439703363889
Aurora Litorânea -0.380664930994646 -0.0440131617574438 0.00515324433620079 0.276300947123084
Gávea Azul -0.278369860393196 0.00301998140410285 -0.0778164922827419 0.326764002853757
Mandacaru United -0.366678433324156 -0.0409058720917537 0.460370338423976 0.254512949918488
Capibaribe -0.689199061120488 -0.191761423401618 -0.164036612738378 0.131316304389949
Índios Tupiniquins -0.116360798163389 0.194506886382665 0.380091713409793 0.661882430915621
Atlético Taquara Verde -0.191619530714647 0.0988246805416525 0.013489999685528 0.435148778531186
Seriema -0.506112792921042 -0.125063342441857 -0.189269568244125 0.18794865077992
Blumenau City -0.507379439506428 -0.0560850276198738 -0.464708286585779 0.322747132995234
Iguaçu -0.0155115049055688 0.284992865463268 0.269797889360486 0.646109171932368
Atlético Palmares -0.118624918707132 0.157314516077287 0.338818517606009 0.486786838623903
Serra Dourada -0.540539312495563 -0.174220412975678 -0.108861210929644 0.117147666399002
Sambaqui -0.442042469356234 -0.0642481940391308 0.0357470879309517 0.275525304060387
Pampa -0.120299851158099 0.16053316356175 0.0960075880253432 0.559275740742482
Riacho do Meio -0.219615670481921 0.0790203487251408 -0.0482206692388018 0.438157367954048
Sport Club Xingu -0.644339584506739 -0.159014437187439 -0.155325504904303 0.191699644285399
Parâmetro 2,5% Mediana Valor Real 97,5%
Dragões do Sertão -0.324132379444571 -0.0240319965209908 -0.138702392083755 0.280990789103452
Atlético Rio Vermelho -0.288972322506234 0.0151538488331187 0.179518939777232 0.31119297969908
Borborema -0.548238280487966 -0.16761089695183 -0.388408664875364 0.127243870326794
Guerreiros da Mata -0.479344675114894 -0.140650008451315 -0.0985248652079147 0.15033445751558
Cacique -0.143957994960455 0.146083952897262 0.155700269726477 0.478233040670269
Aurora Litorânea -0.555214743457205 -0.179851882662519 -0.415051347170442 0.126079404822465
Gávea Azul -0.326631327916215 -0.0238095576214781 -0.0713231834408563 0.275129545203146
Mandacaru United -0.529790753496902 -0.16626872100526 -0.220736000247786 0.121652691666809
Capibaribe -0.263085123191682 0.0560759028694948 -0.240692873700734 0.364101130986607
Índios Tupiniquins 0.0367979091229049 0.31894030824373 0.444775726868537 0.637710091271504
Atlético Taquara Verde -0.453659977132982 -0.105695638382251 0.0903494111052745 0.20280589027185
Seriema -0.0984849995516224 0.176512765788755 0.407366815001814 0.489951180408347
Blumenau City -0.148105391351347 0.126299204660069 -0.0215053600494572 0.43942076256535
Iguaçu -0.419117975313693 -0.0858727193394585 0.293777436078882 0.210528703297971
Atlético Palmares -0.394773746886075 -0.0551852190211271 -0.1881797295774 0.262894557968275
Serra Dourada -0.607691470162477 -0.208541418066769 -0.205135556887134 0.0837859928289462
Sambaqui -0.199312707677745 0.0926052442105356 0.0907015404513211 0.395788636535009
Pampa -0.303642537027601 0.000560855718158136 -0.154114538132687 0.317946364136153
Riacho do Meio -0.0135340686413304 0.26814242428017 0.134235040982881 0.591170175847091
Sport Club Xingu -0.238324820241502 0.0627661676486354 0.070849122257052 0.374601843849574
Parâmetro 2,5% Mediana Valor Real 97,5%
Dragões do Sertão 0.0435873971259675 0.329353738744747 0.186068264138885 0.633500849298746
Atlético Rio Vermelho 0.0278435959421066 0.298065950779511 0.297637801570818 0.615803205942806
Borborema 0.0647123686495419 0.370926150367783 0.144239081768319 0.662873346896516
Guerreiros da Mata 0.0177084210995142 0.217056001800571 0.272078771120869 0.504435337810125
Cacique 0.0192365350411786 0.233006608029934 0.192107722931541 0.52410414812132
Aurora Litorânea 0.00778130157704609 0.172814572884263 0.0779747299617156 0.477781606126556
Gávea Azul 0.00420180105315424 0.110227545811171 0.0249118954874575 0.380865886431313
Mandacaru United 0.00721692892836691 0.128133364637354 0.225656101712957 0.410865371569554
Capibaribe 0.055725823031064 0.38128095639372 0.295684356009588 0.688412987597847
Índios Tupiniquins 0.129230017404663 0.419135245709634 0.222422846825793 0.686656947171257
Atlético Taquara Verde 0.00411065384524426 0.088993767842712 0.00536567308008671 0.321789922110437
Seriema 0.00861180375093025 0.185406524889129 0.0143319674301893 0.51468734455877
Blumenau City 0.194125191941912 0.562616587925612 0.0538998482516035 0.815125278463716
Iguaçu 0.00374039974235462 0.0888045351132796 0.12423239259515 0.309116709082287
Atlético Palmares 0.00360842228658006 0.0987315897514858 0.190052686608396 0.339711718477325
Serra Dourada 0.00388776147989358 0.0952181874708516 0.107338871550746 0.336795396415936
Sambaqui 0.0287301512654696 0.275108535750024 0.22365562885534 0.582126001324242
Pampa 0.0247272975734769 0.281988792050236 0.100323365209624 0.556798655902911
Riacho do Meio 0.0829889235314834 0.386729781608806 0.0629933654330671 0.677332351286563
Sport Club Xingu 0.0635268654599548 0.376159931076396 0.293147792830132 0.681749768496467
Parâmetro 2,5% Mediana Valor Real 97,5%
Dragões do Sertão 0.0316459856853146 0.324008471607562 0.286647442833055 0.662204949008665
Atlético Rio Vermelho 0.347725588739451 0.735521346642705 0.415380405855831 0.935766574734407
Borborema 0.00873377703697948 0.133528289396631 0.034975206758827 0.434084176473626
Guerreiros da Mata 0.0074336605919636 0.134418533169167 0.241011042601895 0.416725602218637
Cacique 0.0079432757244589 0.140322425138806 0.00415421485668048 0.448522293702238
Aurora Litorânea 0.00799229888762837 0.203277010963235 0.328394509339705 0.54465806559434
Gávea Azul 0.00661796389291709 0.139846476971296 0.188010889396537 0.440071642089654
Mandacaru United 0.0105883711650606 0.20376879316556 0.29648182222154 0.532028544507811
Capibaribe 0.0461036258388963 0.387312834124987 0.10889605844859 0.699439010304832
Índios Tupiniquins 0.0814960265826134 0.423470684018901 0.447973049618304 0.709651884288299
Atlético Taquara Verde 0.0267863216665601 0.26469422637114 0.234016692568548 0.582380368253029
Seriema 0.0248485336186786 0.315509309474221 0.109025591542013 0.666845578800884
Blumenau City 0.0596049506355894 0.426224113938536 0.4352563221124 0.729085908400134
Iguaçu 0.0166604011397006 0.208895985738173 0.17518308176659 0.497835952186633
Atlético Palmares 0.00770991158730911 0.165609064944195 0.110050908580888 0.472045078453557
Serra Dourada 0.013300733154535 0.239180701698357 0.368514346063603 0.574130848928436
Sambaqui 0.0939191114914859 0.496443720851195 0.163750022824388 0.787397748201434
Pampa 0.0446091790218897 0.35038845380017 0.39024336951552 0.647720911920725
Riacho do Meio 0.00290825662884456 0.0827045771762317 0.314307681180071 0.344500314856779
Sport Club Xingu 0.0960544600933785 0.557352275396635 0.380013904510997 0.836707940112042
Parâmetro 2,5% Mediana Valor Real 97,5%
home 0.0483427834330506 0.245084442388456 0.193570521223964 0.447469530002641
beta_0 -0.378512012945774 -0.142745951809835 -0.190215722053536 0.0729491926780288
sd_att 0.0721599673309169 0.228710346633423 0.25 0.434277544690512
sd_def 0.107033839251571 0.224055108274906 0.25 0.379483749849255